import time

import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# TODO 数据二次加工，可在GraphFrame.py文件运行，也可单独运行
url = "./userListLink_demo_100.json"  # json文件路径，可视化板块中需要保存的最终文件内容

userList = "./userList_demo_100_ID.csv"  # 存有pageRank影响力系数的文件

userLink = "./userLink_demo_100.csv"  # Edge Table表


def setJson():
    def func(x):
        if x <= 30:
            return x / 5 + 2
        elif x <= 40:
            return x / 5 + 8
        elif x <= 50:
            return x / 5 + 16
        else:
            return x / 5 + 32

    def getJobCategory(x):
        a = ["Politician", "Lawyer", "Blogger", "Performer", "Guide", "Director", "Business personal"]
        b = ["Engineer", "Student", "Retired military personal", "Coach", "Manager", "Singer",
             "Musician", "Securities analyst", "Chairman", "Photographer"]
        c = ["Freelance", "Athletes", "Police", "Accountant", "Teacher", "Doctor"]
        d = ["Soldier", "Programmer", "Nurse", "Fireman", "Construction worker", "Literature", "Aviation technician",
             "Physical scientist", "Biologist", "Architect"]
        e = ["Waiter", "Chemical technician", "Explorers", "Historian", "Maritime affairs", "Lyricist",
             "Composer", "Forest ranger"]
        if x in a:
            return 0
        elif x in b:
            return 1
        elif x in c:
            return 2
        elif x in d:
            return 3
        else:
            return 4

    # TODO 连续分布转阶梯分布，展开细节
    def Continuous_Discrete_Distribution():
        df = pd.read_csv(userList)
        bins = [i for i in range(0, 105, 5)]
        # print(bins)
        df1 = df["pagerank"].round(2)
        df["pagerank"] = df["pagerank"] * 40
        df["pagerank"] = pd.cut(df["pagerank"], bins, labels=False)
        df["pagerank"] = df["pagerank"] * 5 + 5
        df["pagerank"] = df["pagerank"].apply(func).astype(int)
        df2 = df["pagerank"]
        df = df.rename(columns={"pagerank": "symbolSize", "occupation": "value"})
        df["category"] = df["value"].apply(getJobCategory)
        path1 = "原pageRank权重系数分布.jpg"
        path2 = "PageRank阶梯分布.jpg"
        draw_distribution_histogram(df1, path1, 20, "Continuous distribution")
        draw_distribution_histogram(df2, path2, 20, "Stepped distribution")
        print(df)
        return df

    df1 = Continuous_Discrete_Distribution()
    # print(df1)
    df1["id"] = df1["id"].astype(str)
    df2 = pd.read_csv(userLink)
    df2 = df2[["src", "dst"]]
    df2["src"] = df2["src"].astype(str)
    df2["dst"] = df2["dst"].astype(str)
    df2 = df2.rename(columns={"src": "source", "dst": "target"})
    print(df2.dtypes)
    a = {"name": ["A", "B", "C", "D", "E"]}
    df3 = pd.DataFrame(a)
    final_json = {"nodes": df1.to_dict("records"), "links": df2.to_dict("records"),
                  "categories": df3.to_dict("records")}
    with open(url, "w") as f_new:
        json.dump(final_json, f_new)


def setALScore():
    dfRank = pd.read_csv(userList)

    # TODO 将态度转换为正态分布的分数
    def getAttitude(name):
        dfName = dfRank[dfRank["name"] == name]
        pagerank = dfName.pagerank.values[0]
        score = np.random.normal(pagerank * 3, 0.5)
        return format(score, '.2f')

    # TODO 获取分数频率分布图
    def getALSScoreImage():
        url = "./ALS_score.csv"
        df = pd.read_csv(url)
        df1 = df["attitude"]
        path1 = "ALS_score.jpg"
        draw_distribution_histogram(df1, path1, 20, "ALS_Score distribution")

    dfLink = pd.read_csv("result_demo_100.csv")
    dfLink["attitude"] = dfLink["guest"].apply(getAttitude).astype("float")
    print(dfLink)
    dfLink.to_csv("ALS_score.csv", index=False)
    time.sleep(2)
    getALSScoreImage()

# TODO 画图板块，直观感受数据加工过后与源数据的区别
def draw_distribution_histogram(nums, path, bins, name, is_hist=True, is_kde=True, is_rug=False, is_vertical=False,
                                is_norm_hist=False):
    """
        bins: 设置直方图条形的数目
        is_hist: 是否绘制直方图
        is_kde: 是否绘制核密度图
        is_rug: 是否绘制生成观测数值的小细条
        is_vertical: 如果为True，观察值在y轴上
        is_norm_hist: 如果为True，直方图高度显示一个密度而不是一个计数，如果kde设置为True，则此参数一定为True
    :return: 图片.jpg
    """
    sns.set()  # 切换到sns的默认运行配置
    sns.distplot(nums, bins=bins, hist=is_hist, kde=is_kde, rug=is_rug,
                 hist_kws={"color": "steelblue"}, kde_kws={"color": "purple"},
                 vertical=is_vertical, norm_hist=is_norm_hist)

    plt.xlabel("PageRank values")
    plt.ylabel("Frequency")

    # 添加标题
    plt.title(name)
    plt.tight_layout()  # 处理显示不完整的问题
    plt.savefig(path, dpi=300)
    plt.close()


if __name__ == '__main__':
    # TODO 为Echarts可视化处理json文件
    setJson()
    # TODO 生成优化的ALS打分表
    setALScore()
